clear all
set more off
set trace off
cap log close
set scheme sj


**************************************
* Generate cohort-experience U-rates *
**************************************
use "$temp/U_internal.dta", clear
xtset region_entry year
tssmooth ma U01=U_internal, window(0 1 1)
tssmooth ma U012=U_internal, window(0 1 2)
gen U23 = F2.U01
gen U345 = F3.U012
save "$temp/temp_cohort", replace

********************************
* Start usual analysis program *
********************************

xtset, clear

local win = "w_3_stack"

foreach w in `win' {
global windows "`w'" 

*Set unemployment rate here
global U "U012"
global U_med "U345"


cap log close

log using "$log/08_06_dom_job_analysis_${windows}_mediation_${S_DATE}_BC_U_${U}_U_med_${U_med}.log", replace

********************************************************************************
* Load Data and generate log based on specific options *
********************************************************************************

*Log for specific options	
dis "$S_DATE" " " "$S_TIME"
dis "Unemployment rate is $U + mediation, windows for FE/Value analysis are $windows "

use train_start train_length year_entry region_entry german gender train_end_age month_entry train_edu wage_ft aearnings aearnings_ft spell_length_ft work_at_train work_in_train_occ work_in_train_ind region_entry year_entry potential_experience year dom_job train_occ persnr train_ind wage current_ind region_current empl_current U_internal dom_job_ft betnr beruf certificate same_train_occ no_trainings using "$temp/analysis_pt_new", clear 

***********************************************
* MERGE IN COHORT-SPECIFIC UNEMPLOYMENT RATES *
***********************************************
merge m:1 region_entry year_entry using "$temp/temp_cohort", keep(3) nogen
cap erase "$temp/temp_cohort.dta"

********************************************************************************
* Merge in chosen AKM-Values
********************************************************************************
gen ffe = .
gen re = .
gen cod = .

	if "$windows"=="w_3_stack" {
		forval t = 1999(3)2017 {	
			merge m:1 betnr using "$FE//firmfe_value_new_ver`t'3_bc.dta", keep(1 3) nogen
			replace ffe = firm_fe if year>=`t'-1 & year<=`t'+1
			replace re = rent if year>=`t'-1 & year<=`t'+1
			replace cod = cd if year>=`t'-1 & year<=`t'+1
			drop firm_fe rent cd
		}		
	}

rename ffe firm_fe
rename re rents
rename cod cd

keep if !missing(firm_fe)
keep if !missing(cd)
keep if dom_job_ft == 1 

********************************************************************************
* Deflation of Nominal Variables *
********************************************************************************

*Consumer Price Index West Germany 1975 - 1991 (base year 1995)
gen cpi=.
label variable cpi "Consumer Price Index"
replace cpi = 54.5 if year == 1975
replace cpi = 56.8 if year == 1976
replace cpi = 58.9 if year == 1977
replace cpi = 60.5 if year == 1978
replace cpi = 63.0 if year == 1979
replace cpi = 66.4 if year == 1980
replace cpi = 70.6 if year == 1981
replace cpi = 74.3 if year == 1982
replace cpi = 76.7 if year == 1983
replace cpi = 78.6 if year == 1984
replace cpi = 80.2 if year == 1985
replace cpi = 80.1 if year == 1986
replace cpi = 80.3 if year == 1987
replace cpi = 81.3 if year == 1988
replace cpi = 83.6 if year == 1989
replace cpi = 85.8 if year == 1990
replace cpi = 89.0 if year == 1991

*Set 2015 as base year for the period 1975 - 1991
replace cpi = (cpi/89.0)*65.5		// see Statistisches Bundesamt (2019)

*Consumer Price Index Germany (West and East, after 1991)
replace cpi =  68.8 if year == 1992
replace cpi =  71.9 if year == 1993
replace cpi =  73.8 if year == 1994
replace cpi =  75.1 if year == 1995
replace cpi =  76.1 if year == 1996
replace cpi =  77.6 if year == 1997
replace cpi =  78.3 if year == 1998
replace cpi =  78.8 if year == 1999
replace cpi =  79.9 if year == 2000
replace cpi =  81.5 if year == 2001
replace cpi =  82.6 if year == 2002
replace cpi =  83.5 if year == 2003
replace cpi =  84.9 if year == 2004
replace cpi =  86.2 if year == 2005
replace cpi =  87.6 if year == 2006
replace cpi =  89.6 if year == 2007
replace cpi =  91.9 if year == 2008
replace cpi =  92.2 if year == 2009
replace cpi =  93.2 if year == 2010
replace cpi =  95.2 if year == 2011
replace cpi =  97.1 if year == 2012
replace cpi =  98.5 if year == 2013
replace cpi =  99.5 if year == 2014
replace cpi = 100.0 if year == 2015
replace cpi = 100.5 if year == 2016
replace cpi = 102.0 if year == 2017
replace cpi = 103.8 if year == 2018
replace cpi = 105.3 if year == 2019

*Deflate wages, marginal part-time income threshold and contribution assessment ceiling (base year 2015)
gen wage_ft_defl      = 100 * wage_ft / cpi
gen wage_defl         = 100 * wage / cpi
gen aearnings_ft_defl = 100 * aearnings_ft / cpi
gen aearnings_defl    = 100 * aearnings / cpi
drop cpi

********************************************************************************
* Define Main Analysis Sample 
********************************************************************************
keep if inlist(train_edu,1,3)
drop if year_entry == 1997

su potential_experience
drop if potential_experience == 20 
drop if potential_experience < 0

********************************************************************************
* Merge in and define additional varaibles
********************************************************************************

*High-skil occupation designation
merge m:1 train_occ using "$temp/high_skill"
drop if _merge == 2
drop _merge

*Logged earnings and wage variables
gen earnings_ft_defl = spell_length_ft * wage_ft_defl
gen log_earnings_ft_defl  = log(earnings_ft_defl)

*East-West
tab region_entry, mis
gen east = inrange(region_entry,11,16) // counting Berlin as East
replace east = . if missing(region_entry)

*Same region indicator
gen byte same_region = (region_current==region_entry)

********************************************************************************
* Obtain components needed for PDV calculations *
********************************************************************************

*Main decomposition baseline outcome (full periode)
qui sum potential_experience if dom_job_ft == 1 & !missing(firm_fe) &  !missing(cd)
global maxe = r(max)
global maxfirste = 9 
global maxseconde = 19

tabstat earnings_ft_defl if dom_job_ft == 1 & !missing(firm_fe) &  !missing(cd), by(potential_experience) save
local maxe1 = $maxe + 1
forval ep1 = 1/`maxe1' {
	local e = `ep1'-1
	mat a = r(Stat`ep1')
	global earnings_ft_defl`e' = a[1,1]
}

*Unemployment rate SD
su $U if year == year_entry, de
global sdu = r(sd) 

*Interest Rate
global R = 1.05

********************************************************************************
* Main Decomposition -- No Mediation
********************************************************************************
gen nonfirm = log_earnings_ft_defl - firm_fe
local def_out 		"log_earnings_ft_defl firm_fe rents cd nonfirm"
local fe_absorb = 	"year_entry potential_experience year german gender train_end_age month_entry train_edu train_occ region_entry"


foreach outcome of varlist `def_out' {

	*Regression
	reghdfe `outcome' i.potential_experience#c.${U} if !missing(firm_fe) & !missing(cd) & dom_job_ft == 1, absorb(`fe_absorb') vce(cl region_entry)

	
		matrix coef1 = e(b)
		matrix var1  = e(V)
		matrix coef2 = e(b)
		matrix var2  = e(V)
		gen byte esample1 = e(sample)
		gen byte esample2 = e(sample)
		
		*Cumulation (first 10 years)
		local denom = 0
		forvalues e = 0/$maxfirste {
			local denom = `denom' + ${earnings_ft_defl`e'} *(($R )^(-`e'))	
		}
		local command_1 "0"
		forvalues e = 0/$maxfirste {  // 
			local exp = ${R}^(-`e')
			local command_1 = "`command_1' + ${earnings_ft_defl`e'} * (1 + $sdu * `e'.potential_experience#c.$U ) * 100 * ( `exp' / `denom')"
		}
		distinct betnr if e(sample)
		local firms = r(ndistinct)
		distinct persnr if e(sample)
		local workers = r(ndistinct)
		distinct persnr betnr if e(sample)
		local pairs = r(ndistinct)
		eststo `outcome'_1: lincomest `command_1' - 100
		estadd sca pairs = `pairs' : `outcome'_1
		estadd sca firms = `firms' : `outcome'_1
		estadd sca workers = `workers' : `outcome'_1
	
		cap drop esample1
		cap drop esample2
		
	}
	


*FINAL RESULTS for a given combination
esttab log_earnings_ft_defl_1 firm_fe_1 rents_1 cd_1 nonfirm_1, sca(firms workers pairs) mti b(2) se(2) title("FINAL FIRST DECADE RESULTS FOR: NON-Mediated ${U}; AKM-Value window = ${windows}") star(* 0.1 ** .05 *** 0.01)



eststo clear


	/*This is transposed from the paper -- each column is earn, firm_fe, rents, cd, non_firm*/


********************************************************************************
* Main Decomposition -- With Mediation
********************************************************************************

if "${U_med}" == "U23" {
	local start = 2
	}
if "${U_med}" == "U345" {
	local start = 3
	}


forval i = `start'/19 {
	gen pe`i'_${U_med} = 0
	replace pe`i'_${U_med} = ${U_med} if potential_experience == `i'
}


compress

local def_out 		"log_earnings_ft_defl firm_fe rents cd nonfirm"
local fe_absorb = 	"year_entry potential_experience year german gender train_end_age month_entry train_edu train_occ region_entry"

di "`fe_absorb'"

foreach outcome of varlist `def_out' {

	*Regression
	reghdfe `outcome' i.potential_experience#c.${U} pe*_${U_med}, absorb(`fe_absorb') vce(cl region_entry) poolsize(1) compact

	
		matrix coef1 = e(b)
		matrix var1  = e(V)
		matrix coef2 = e(b)
		matrix var2  = e(V)
		gen byte esample1 = e(sample)
		gen byte esample2 = e(sample)
		
		*Cumulation (first 10 years)
		local denom = 0
		forvalues e = 0/$maxfirste {
			local denom = `denom' + ${earnings_ft_defl`e'} *(($R )^(-`e'))	
		}
		local command_1 "0"
		forvalues e = 0/$maxfirste {  // 
			local exp = ${R}^(-`e')
			local command_1 = "`command_1' + ${earnings_ft_defl`e'} * (1 + $sdu * `e'.potential_experience#c.$U ) * 100 * ( `exp' / `denom')"
		}
		distinct betnr if e(sample)
		local firms = r(ndistinct)
		distinct persnr if e(sample)
		local workers = r(ndistinct)
		distinct persnr betnr if e(sample)
		local pairs = r(ndistinct)
		eststo `outcome'_1: lincomest `command_1' - 100  
		estadd sca pairs = `pairs' : `outcome'_1
		estadd sca firms = `firms' : `outcome'_1
		estadd sca workers = `workers' : `outcome'_1
	
		cap drop esample1
		cap drop esample2
		
	}
	
*FINAL RESULTS for a given combination
esttab log_earnings_ft_defl_1 firm_fe_1 rents_1 cd_1 nonfirm_1, sca(firms workers pairs) mti b(2) se(2) title("FINAL FIRST DECADE RESULTS FOR: MEDIATED ${U}; AKM-Value window = ${windows}") star(* 0.1 ** .05 *** 0.01)

	/*This is transposed from the paper -- each column is earn, firm_fe, rents, cd, non_firm*/	
	
}


* Tab. E4
use "$temp/U_internal.dta", clear

xtset region_entry year

local list ""
forval l = 1(1)5 {
	
	local list "`list' L`l'.U_internal"
	eststo order`l': reghdfe U_internal `list' if year>=1998, a(region_entry year) cl(region_entry)
}

esttab order*, b(2) se(2) sca(r2_within) sfmt(2) nocons


clear
cap log close
